/*
Project:				Perry Intergenerational
Author:					Victor Ronda
Original Date:			January 21, 2022
*/



*-------------------------------------------------------------------------------*
*  Computing the Treatment Effect Estimates and p-values for Main Participants  *
*-------------------------------------------------------------------------------*


*------------------------------- Preamble --------------------------------------*

clear all
set more off
set matsize 10000
set maxvar 32767, permanently
version 15.0


*** Setting environment and filepaths ***
global klmshare : env klmshare
global klmperry : env klmperry
global data /share/klmperry/TE/data
global emp /share/klmperry/multigen_revision/Empirics

*-------------------------------------------------------------------------------*

*** Load penalized logit and penalized probit programs ***
do $emp/Scripts/penalized_logit.do

*--------------------------------- Globals -------------------------------------*

global xvars iq ses male mwork
global sxvars iq ses mwork

global depvarlist fbirth_55 fbirth20_35 any_outwed_55 any_cohab smpar10_2 dailyread_child_55 rhs_55 col_55 emp110_3 earn110_3 s_mjail_any50 s_marr_vio50 s_fpar_vio50 gint postrait1 grit risktaking stablemar_21_40 earn_21_40 lat_health lat_health80


use $data//Cleaned_data/perry_participants.dta, clear
cd "$emp/Results"

foreach depvar in $depvarlist {
replace `depvar'=. if biochild==0 | biochild==.
}

*-------------------------------------------------------------------------------*
*        Computing the Treatment Effect Estimates           					*
*-------------------------------------------------------------------------------*

program simx, rclass




cap gen streat = treat


*Compute a score for propensity of receiving treatment:
cap penlogit "streat" "$xvars" "" "predict pd, pr"

*Compute a score for propensity of being interviewed given the treatment status:
forval v = 0/1 {
		cap su interviewed00 if streat==`v'
		if r(mean)==1 {
			cap gen pi`v' = 1 if interviewed00==1
		}
		else {
			cap penlogit "interviewed00" "$xvars" "streat==`v'" "predict pi`v' if interviewed00==1, pr"
		}
	}


*** Computing treatment effect estimates and t-statistics for each outcome variable ***

foreach depvar in $depvarlist {

//locals for gender of the participant
local pm0 "0"
local pm1 "1"
local pm2 "0,1"

	
	local dvar "`depvar'"
	cap gen nmiss = (`dvar' <.) //indicator of non-missing data for the dependent variable

*** For AIPW estimates, generate non-missing-ness probabilities and conditional expectations of the outcome ***	
*Compute a score for propensity of having a non-missing outcome given the treatment and interview:	
	forval v = 0/1 {
		cap su nmiss if streat==`v' & interviewed00==1 
		if r(mean) == 1{
			cap gen pn`v' = 1 if nmiss==1 & streat==`v' 
		}
		else {
			cap penlogit "nmiss" "$xvars" "streat==`v' & interviewed00==1" "predict pn`v' if nmiss==1 & streat==`v' , pr"
		}
	}

*Inverse probability weights		
cap gen a1 = streat/(pn1 * pi1 * pd) if nmiss==1 & streat==1
cap replace a1 = 0 if nmiss==1 & streat==0 
cap gen a0 = (1 - streat)/(pn0 * pi0 * (1 - pd)) if nmiss==1 & streat==0
cap replace a0 = 0 if nmiss==1 & streat==1 
cap gen a = a1 + a0

*OLS-based conditional expectations of the outcome to use in the AIPW estimator
local pm0 "0"
local pm1 "1"
forval i = 0/1 {
	forval v = 0/1 {
		cap count if inlist(male,`pm`i'') & `dvar' <. & streat==`v' & nmiss==1
		cap local count_obs = r(N)
		if `count_obs' > 3 {
			cap reg `dvar' $sxvars if inlist(male,`pm`i'') & nmiss==1 & streat==`v' 
			cap predict `dvar'_aipw_`v'_`i' if inlist(male,`pm`i'') & inlist(nmiss,0,1) 
		}
		else if `count_obs' >= 1 & `count_obs' <= 3 {
			 cap su `dvar' if inlist(male,`pm`i'') & nmiss==1 & streat==`v' 
			 cap gen `dvar'_aipw_`v'_`i' = r(mean) if inlist(male,`pm`i'') & inlist(nmiss,0,1) 
		}
		else {
			cap gen `dvar'_aipw_`v'_`i' = . if inlist(male,`pm`i'') & inlist(nmiss,0,1)
		}
		
	}
}
cap gen `dvar'_aipw_1 = `dvar'_aipw_1_1 if male==1 
cap replace `dvar'_aipw_1 = `dvar'_aipw_1_0 if male==0 
cap gen `dvar'_aipw_0 = `dvar'_aipw_0_1 if male==1 
cap replace `dvar'_aipw_0 = `dvar'_aipw_0_0 if male==0 

*AIPW estimator at the individual level, which will be averaged later
cap gen te_aipw = (`dvar'_aipw_1 + a1 * (`dvar' - `dvar'_aipw_1)) - (`dvar'_aipw_0 + a0 * (`dvar' - `dvar'_aipw_0)) if nmiss==1
cap replace te_aipw = `dvar'_aipw_1 - `dvar'_aipw_0 if nmiss==0 
//locals for gender of the participant
local pm0 "0"
local pm1 "1"
local pm2 "0,1"
forval i = 0/2 {

	cap su `dvar' if inlist(male,`pm`i'')  
	cap return scalar `depvar'_nmisscount_`i' = r(N)
	 
	cap su `dvar' if inlist(male,`pm`i'') & streat==0 
	cap return scalar `depvar'_cmean_`i' = r(mean)
	
	cap su `dvar' if inlist(male,`pm`i'') & streat==1 
	cap return scalar `depvar'_tmean_`i' = r(mean)
	
*** AIPW (augmented inverse probability weighting-based treatment effect estimate) ***

	cap reg te_aipw if inlist(male,`pm`i''), cluster(sid)
	cap return scalar `depvar'_aipwd_`i' = _b[_cons]
	if _rc==0 {
		if _b[_cons]==0 {
			cap return scalar `depvar'_aipwt_`i' = 0
		}
		else {
			cap return scalar `depvar'_aipwt_`i' = _b[_cons]/_se[_cons]
		}
	}



*** UDIM (unconditional difference in means) ***	
	
	cap su streat if inlist(male,`pm`i'') & nmiss==1 
	if r(mean) > 0 & r(mean) < 1 {
		cap reg `dvar' streat if inlist(male,`pm`i'') & nmiss==1, cluster(sid)
		cap return scalar `depvar'_udimd_`i' = _b[streat]
		if _rc==0 {
			if _b[streat]==0 {
				cap return scalar `depvar'_udimt_`i' = 0
			}
			else {
				cap return scalar `depvar'_udimt_`i' = _b[streat]/_se[streat]
			}
		}
	}
	
*** COLS (conditional ordinary least squares-based treatment effect estimate) ***
	
	cap su streat if inlist(male,`pm`i'') & nmiss==1 
	if r(mean) > 0 & r(mean) < 1 {
		cap reg `dvar' streat $xvars if inlist(male,`pm`i'') & nmiss==1, cluster(sid)
		cap return scalar `depvar'_colsd_`i' = _b[streat]
		if _rc==0 {
			if _b[streat]==0 {
				cap return scalar `depvar'_colst_`i' = 0
			}
			else {
				cap return scalar `depvar'_colst_`i' = _b[streat]/_se[streat]
			}
		}
	}

*** Lee bounds ***
* Note: need to install package leebounds first
		cap leebounds `dvar' streat if inlist(male,`pm`i''), select(interviewed00)
		cap mat leetable = r(table)
		if _rc==0 {
			cap return scalar `depvar'_lblld_`i' = leetable[1,1]
			if leetable[1,1]==0 {
				cap return scalar `depvar'_lbllt_`i' = 0
			}
			else {
				cap return scalar `depvar'_lbllt_`i' = leetable[1,1]/leetable[2,1]
			}
			
			cap return scalar `depvar'_lbuld_`i' = leetable[1,2]
			if leetable[1,2]==0 {
				cap return scalar `depvar'_lbult_`i' = 0
			}
			else {
				cap return scalar `depvar'_lbult_`i' = leetable[1,2]/leetable[2,2]
			}
		}	
	
	
}



foreach tempovar in nmiss pn0 pn1 a1 a0 a `dvar'_aipw_1_1 `dvar'_aipw_1_0 `dvar'_aipw_0_1 `dvar'_aipw_0_0 `dvar'_aipw_1 `dvar'_aipw_0 te_aipw {
	qui cap drop `tempovar'
}

}
	



foreach tempovar in pd pi*  {
	qui cap drop `tempovar'
}


*sum streat
cap drop streat 

end

global storeresults ""
foreach depvar in $depvarlist {
		forval i = 0/2 {
			foreach result in nmisscount cmean tmean udimd udimt colsd colst aipwd aipwt lblld lbllt lbuld lbult {
				global storeresults "$storeresults `depvar'_`result'_`i' = r(`depvar'_`result'_`i')"
		}
	}
}



*** Main Estimates ***
simulate $storeresults, seed(12345) reps(1) noisily saving($emp/Results/estimates_participants.dta, replace every(1)): simx

*** Estimates for Permutation Tests***
permute treat $storeresults, seed(12345) reps(1000) saving($emp/Results/estimates_perm_participants.dta, replace every(20)): simx

*** Estimates for Bootstrap Tests***
bootstrap $storeresults, seed(12345) cluster(id) idcluster(idnew) nodrop reps(1000) saving($emp/Results/estimates_boot_participants.dta, replace every(20)): simx


*-------------------------------------------------------------------------------*
*                         Computing Asymptotic P-Values                         *
*-------------------------------------------------------------------------------*

use $emp/Results/estimates_participants.dta, clear
drop *_nmisscount_* *_cmean_* *_tmean_*

foreach dvar in $depvarlist {
foreach estm in udim cols aipw {
forval i = 0/2 {

qui {

*Computing the asymptotic p-value based on asymptotic standard error
local aapval = normal(-abs(`dvar'_`estm't_`i'[1]))

*Computing the asymptotic p-value based on bootstrap standard error
su `dvar'_`estm'd_`i'
local abpval = normal(-abs(`dvar'_`estm'd_`i'[1]/r(sd)))
if normal(-abs(`dvar'_`estm'd_`i'[1]/r(sd)))==. {
local abpval = `aapval'
}

replace `dvar'_`estm'd_`i' = `aapval' if _n==1
replace `dvar'_`estm't_`i' = `abpval' if _n==1

}

}
}
}

keep if _n==1

save $emp/Results/asym_pval_participants, replace


*-------------------------------------------------------------------------------*
*                         Computing Permutation P-Values                         *
*-------------------------------------------------------------------------------*
use $emp/Results/estimates_participants.dta, clear
append using $emp/Results/estimates_perm_participants.dta



foreach dvar in $depvarlist {
    foreach estm in udim cols aipw {
		forval i = 0/2 {

qui{
count if _n > 1 & `dvar'_`estm'd_`i' <.
local bootB = r(N)
count if _n > 1 & `dvar'_`estm'd_`i' >= `dvar'_`estm'd_`i'[1]
local p_r = (r(N) + 1)/`bootB'
count if _n > 1 & `dvar'_`estm'd_`i' <= `dvar'_`estm'd_`i'[1]
local p_l = (r(N) + 1)/`bootB'


replace `dvar'_`estm'd_`i' =  min(`p_l',`p_r',1) if _n==1

}
		}
}
}



keep if _n==1


save $emp/Results/perm_pval_participants, replace


*-------------------------------------------------------------------------------*
*                         Computing Bootstrap P-Values                         *
*-------------------------------------------------------------------------------*
use $emp/Results/estimates_participants.dta, clear
append using $emp/Results/estimates_boot_participants.dta


foreach dvar in $depvarlist {
foreach estm in aipw  udim cols {
forval i = 0/2 {

qui {
egen temp=sd(`dvar'_`estm'd_`i') if _n>1
egen `dvar'_`estm'se_`i'=mean(temp)
drop temp

*Computing the bootstrap p-value (type II p-value) based on unstudentized test statistic

count if _n > 1 & `dvar'_`estm'd_`i' <.
local bootB = r(N)
count if _n > 1 & `dvar'_`estm'd_`i' >= 0
local p_r = (r(N) + 1)/`bootB'
count if _n > 1 & `dvar'_`estm'd_`i' <= 0
local p_l = (r(N) + 1)/`bootB'

*Computing the bootstrap p-value (percentile-t p-value) based on studentized test statistic

count if _n > 1 & ( (`dvar'_`estm'd_`i' - `dvar'_`estm'd_`i'[1])/(`dvar'_`estm'd_`i'/`dvar'_`estm't_`i') >= `dvar'_`estm't_`i'[1] | ((`dvar'_`estm'd_`i' - `dvar'_`estm'd_`i'[1])==0 & (`dvar'_`estm'd_`i' - `dvar'_`estm'd_`i'[1]) >= `dvar'_`estm't_`i'[1] & `dvar'_`estm'd_`i' <.) )
local q_r = (r(N) + 1)/`bootB'
count if _n > 1 & ( (`dvar'_`estm'd_`i' - `dvar'_`estm'd_`i'[1])/(`dvar'_`estm'd_`i'/`dvar'_`estm't_`i') <= `dvar'_`estm't_`i'[1] | ((`dvar'_`estm'd_`i' - `dvar'_`estm'd_`i'[1])==0 & (`dvar'_`estm'd_`i' - `dvar'_`estm'd_`i'[1]) <= `dvar'_`estm't_`i'[1] & `dvar'_`estm'd_`i' <.) )
local q_l = (r(N) + 1)/`bootB'

replace `dvar'_`estm'd_`i' =  min(`p_l',`p_r',1) if _n==1
replace `dvar'_`estm't_`i' =  min(`q_l',`q_r',1) if _n==1

}

}
}
}


keep if _n==1

save $emp/Results/boot_pval_participants, replace

